proj_eda Normalized Data

Author

Group 7

Perform EDA (Exploratory Data Analysis) using some Data Visualizations.

merged_df <- read.csv("./all_years/all_yrs.csv")
norm_df <- read.csv("./all_years/all_yrs_norm.csv")
UNITID school_name.x coach_comp_third_2018 support_comp_third_2018 coach_comp_2018 guarantees_2018 total_ops_2018 aid_2018 support_comp_2018 severance_2018 recruiting_2018 teamtravel_2018 equip_2018 game_expenses_2018 marketing_2018 camps_2018 spirit_2018 facilities_rentals_2018 overhead_2018 indirect_institutional_2018 medical_2018 memberships_2018 meals_2018 otherops_2018 bowl_expense_2018 prof_def_2018 bowl_coachcomp_2018 total_expendature.x discounts_to_tuition.x gifts.x reseach_expendatures.x graduation_rate.x school_name.y coach_comp_third_2019 support_comp_third_2019 coach_comp_2019 guarantees_2019 total_ops_2019 aid_2019 support_comp_2019 severance_2019 recruiting_2019 teamtravel_2019 equip_2019 game_expenses_2019 marketing_2019 camps_2019 spirit_2019 facilities_rentals_2019 overhead_2019 indirect_institutional_2019 medical_2019 memberships_2019 meals_2019 otherops_2019 bowl_expense_2019 prof_def_2019 bowl_coachcomp_2019 total_expendature.y discounts_to_tuition.y gifts.y reseach_expendatures.y graduation_rate.y school_name.x.x coach_comp_third_2020 support_comp_third_2020 coach_comp_2020 guarantees_2020 total_ops_2020 aid_2020 support_comp_2020 severance_2020 recruiting_2020 teamtravel_2020 equip_2020 game_expenses_2020 marketing_2020 camps_2020 spirit_2020 facilities_rentals_2020 overhead_2020 indirect_institutional_2020 medical_2020 memberships_2020 meals_2020 otherops_2020 bowl_expense_2020 prof_def_2020 bowl_coachcomp_2020 total_expendature.x.x discounts_to_tuition.x.x gifts.x.x reseach_expendatures.x.x graduation_rate.x.x school_name.y.y coach_comp_third_2021 support_comp_third_2021 coach_comp_2021 guarantees_2021 total_ops_2021 aid_2021 support_comp_2021 severance_2021 recruiting_2021 teamtravel_2021 equip_2021 game_expenses_2021 marketing_2021 camps_2021 spirit_2021 facilities_rentals_2021 overhead_2021 indirect_institutional_2021 medical_2021 memberships_2021 meals_2021 otherops_2021 bowl_expense_2021 prof_def_2021 bowl_coachcomp_2021 total_expendature.y.y discounts_to_tuition.y.y gifts.y.y reseach_expendatures.y.y graduation_rate.y.y school_name coach_comp_third_2022 support_comp_third_2022 coach_comp_2022 guarantees_2022 total_ops_2022 aid_2022 support_comp_2022 severance_2022 recruiting_2022 teamtravel_2022 equip_2022 game_expenses_2022 marketing_2022 camps_2022 spirit_2022 facilities_rentals_2022 overhead_2022 indirect_institutional_2022 medical_2022 memberships_2022 meals_2022 otherops_2022 bowl_expense_2022 prof_def_2022 bowl_coachcomp_2022 total_expendature discounts_to_tuition gifts reseach_expendatures graduation_rate
199139 University of North Carolina at Charlotte 0 0 37.6084 2.9891 242.5071 52.6928 30.7106 0.7800 4.3619 18.8738 8.0816 10.2208 6.0467 0.0000 1.0905 16.1160 18.4733 18.3615 2.3835 2.6468 2.3096 8.7605 0.0000 11.7992 0.000 496.8135 350.5052 79.1712 205.3767 0.1894 University of North Carolina at Charlotte 0 0 46.0872 2.8997 271.6001 56.0832 38.5333 2.5983 5.3454 19.2602 8.6700 10.8250 6.0144 0.0000 1.2671 16.9183 18.7239 22.9167 2.6851 2.7974 2.3938 7.5812 0.0000 17.0372 0.0000 560.2375 408.7012 140.7289 223.2607 0.1950 University of North Carolina at Charlotte 0 0 52.2119 4.9678 288.2604 54.6535 38.5716 2.9587 5.1489 19.0337 11.7427 10.2088 5.0488 0.0000 0.9782 17.7532 19.1325 24.7843 2.5789 3.0881 2.1140 10.3439 2.2147 12.6100 0.7262 589.1308 426.4901 91.8382 238.1076 0.2008 University of North Carolina at Charlotte 0 0 49.3748 0.7440 276.3402 59.5972 36.1769 0.7783 1.0136 13.7193 12.9504 4.8375 2.9154 0.0000 0.6045 17.6107 18.0663 21.0953 21.9792 2.5678 3.0147 9.2944 0 NA 0 552.6804 438.2813 84.8773 290.3364 0.2160 University of North Carolina at Charlotte 0 0 55.9542 4.6309 299.9096 52.9917 44.0449 0.0488 5.5407 26.4480 10.7449 12.2512 4.5430 0.0000 1.6601 18.2134 18.9926 25.7351 2.7753 3.0140 4.7238 7.5973 0.0000 14.3229 0.0000 614.1422 438.1081 98.0369 215.8536 0.2338
104151 Arizona State University Campus Immersion 12500 0 45.8938 3.9856 286.5229 35.5016 43.5892 29.0665 3.4887 13.0890 14.3161 9.4625 11.1612 0.0491 1.2376 34.0412 20.4243 4.3900 3.6847 0.0983 4.1739 4.8612 2.8082 -29.7086 1.172 543.3373 845.4414 171.2845 752.6091 0.0920 Arizona State University Campus Immersion 0 0 44.2249 3.2532 266.4233 37.2985 46.2299 0.2714 3.6387 13.7295 13.1854 9.2089 11.7766 0.0000 1.8137 25.1888 37.0526 4.1219 3.4570 0.1317 3.9604 4.4047 2.8763 7.4129 0.5994 540.2595 905.7922 186.1542 812.3945 0.0999 Arizona State University Campus Immersion 5000 0 46.2556 6.0617 257.3785 36.7406 47.1595 2.8717 2.9295 12.0202 12.4221 9.5262 9.6062 0.0000 1.3195 25.9147 24.3477 3.8565 4.0494 0.1214 3.6645 4.8243 2.8417 NA 0.8347 514.7570 1010.0408 167.6446 852.7580 0.1467 Arizona State University Campus Immersion 5000 0 34.3570 0.3197 159.5013 26.6688 32.9522 0.7705 0.5848 5.9418 7.6118 3.6735 1.8585 0.0000 0.3620 16.7592 16.5540 2.7123 3.6223 0.0537 2.4901 2.2013 0 21.3690 0 340.3716 824.0440 117.3291 637.9806 0.1099 Arizona State University Campus Immersion 5000 0 36.7206 2.5096 185.2749 25.5507 34.8055 0.5418 1.8960 9.0210 8.0303 9.7276 6.0060 0.0000 0.6536 18.1995 14.4115 3.4443 3.5373 0.0924 3.2742 3.7599 2.7571 NA 0.3286 370.5498 876.2341 114.4475 683.2726 0.1079
230728 Utah State University 643515 0 28.9837 2.5594 164.6238 25.9367 19.6846 1.5067 2.4971 12.6717 5.2653 3.9487 2.5891 0.0000 0.0864 24.3834 3.6559 8.2367 1.7304 2.1896 2.8463 9.4408 2.8851 -1.5281 0.815 327.7195 361.2401 80.7451 756.1246 0.0756 Utah State University 592917 0 33.3670 3.6698 170.5178 26.7240 18.5727 1.1157 2.3268 12.6188 5.1281 3.5270 2.4647 0.0000 0.0697 26.1098 3.2986 9.4312 2.1649 2.1993 2.8253 9.1589 2.4587 3.1749 0.8087 344.2105 384.3851 104.3401 967.1165 0.0827 Utah State University 681250 0 34.9412 2.9694 170.7592 27.2176 19.6868 1.6056 2.0146 13.2278 6.6037 3.6020 2.0564 0.0000 0.0680 19.4534 3.7326 10.5997 3.6285 2.3302 2.5995 7.6358 2.9544 NA 0.9381 341.5185 374.7711 91.4608 1040.0575 0.0860 Utah State University 468750 0 37.2324 0.2066 153.1890 28.8647 20.6053 2.5210 0.4883 7.7984 5.0243 2.1563 1.0618 0.0000 0.0000 17.7481 2.6906 8.8183 3.3512 1.8470 2.1295 8.6532 0 3.3751 0 309.7531 388.1709 123.3266 1085.3170 0.1033 Utah State University 615887 0 41.7977 3.1118 184.7208 30.2111 23.0583 0.0000 3.3176 15.3174 6.5673 5.0228 1.9147 0.0000 0.0690 15.5237 3.3494 8.0400 3.3684 2.4717 3.2543 10.9671 3.5655 1.4392 1.1364 370.8807 406.6803 184.8010 1155.7511 0.1001
110635 University of California-Berkeley 0 0 94.7291 9.4670 559.9209 65.9807 104.0745 7.1333 7.0773 32.1560 31.1610 23.1575 19.8679 0.0000 0.7952 104.8157 25.7552 0.0000 11.9915 0.4625 7.8440 13.4526 0.0000 -97.8007 0.000 1022.0412 1005.3896 1599.6485 3423.4898 0.1053 University of California-Berkeley 0 0 104.6133 6.7396 516.7996 58.1813 106.5069 4.0517 8.5717 33.9408 21.8693 25.3109 23.2661 0.0000 0.7928 53.8087 26.6216 0.0000 12.8684 0.1276 4.9214 20.2082 3.4305 NA 0.9689 1033.5991 991.9017 1544.7587 3211.0850 0.1108 University of California-Berkeley 0 0 99.2056 5.2347 479.8975 51.3990 111.9221 5.2564 6.7153 26.1962 25.0012 24.0883 20.9564 0.0000 1.0849 43.8998 22.4038 0.0000 11.3223 0.2656 9.3301 13.5040 1.4203 15.1521 0.6915 974.9471 1013.1897 1540.0773 3263.4672 0.1010 University of California-Berkeley 0 0 109.8929 1.0012 442.7973 54.0061 112.7593 4.8814 1.8238 20.7048 23.3729 6.5183 9.4563 0.0000 0.0099 44.2207 15.3756 0.0000 13.6222 0.4078 6.7051 18.0392 NA 17.2954 0 902.8900 1198.0979 274.4681 3505.5062 0.1061 University of California-Berkeley 0 0 114.8500 8.5324 545.4876 62.5541 122.4546 2.6376 9.8075 38.6766 27.2219 23.7132 20.2260 0.0000 0.5349 45.0169 25.2711 0.0000 15.0387 0.5890 10.9084 17.4548 0.0000 17.7547 0.0000 1108.7300 1403.8958 1866.4224 3684.6140 0.0930
203517 Kent State University at Kent 0 0 31.2298 2.7567 144.0878 30.6634 23.9207 0.0000 2.7993 14.2172 8.5730 3.1329 5.2735 2.1600 0.6536 1.1767 3.2656 0.0000 0.7915 1.4012 0.4449 11.6278 0.0000 -5.7816 0.000 282.3940 344.6302 39.3381 79.6657 0.0368 Kent State University at Kent 0 0 29.7679 2.2981 143.6486 31.3794 25.1836 0.0000 3.3160 15.6979 10.1725 3.7700 5.0160 2.4211 0.9401 0.0000 3.4241 0.0000 1.2140 0.2437 0.5339 8.2701 0.0000 NA 0.0000 287.2973 374.1530 48.4714 102.4385 0.0452 Kent State University at Kent 0 0 30.3060 1.9892 142.8182 33.1480 27.1937 0.0000 2.4610 14.2244 8.5900 3.2317 4.2812 1.7596 0.4722 1.7581 3.5765 0.0000 2.0972 1.5248 0.4159 3.6640 2.1247 NA 0.0000 285.6365 384.9414 5.5103 119.6426 0.0554 Kent State University at Kent 0 0 31.8172 0.2071 105.3177 30.1091 20.4221 0.0000 0.4211 5.8407 5.0451 1.3099 0.6934 0.0603 0.3179 1.7155 0.0000 0.0000 1.6573 1.0945 0.0000 4.6066 0 NA 0 210.6355 371.1401 22.2887 107.2095 0.0572 Kent State University at Kent 0 0 32.3973 2.2728 148.6267 38.3760 26.1164 0.0000 2.9433 17.0268 7.2225 3.0836 3.2131 1.6284 0.2030 0.0000 3.6370 0.0000 2.2904 2.5105 0.7273 3.2513 1.7271 NA 0.0000 297.2534 479.6363 37.1558 127.0126 0.0623
# Gather data into long format for easier plotting
long_data <- norm_df %>%
  select(UNITID, total_ops_2018, total_ops_2019, total_ops_2020, total_ops_2021, total_ops_2022) %>%
  pivot_longer(cols = starts_with("total_ops"), 
               names_to = "year", 
               values_to = "total_ops") %>%
  mutate(year = gsub("total_ops_", "", year)) 

# Summarize total athletic spending by year
summary_data <- long_data %>%
  group_by(year) %>%
  summarize(mean_total_ops = mean(total_ops, na.rm = TRUE))

# Create barplot with revised units
ggplot(data = summary_data, aes(x = year, y = mean_total_ops, fill = year)) +
  geom_bar(stat = "identity", alpha = 0.8, color = "black") +
  labs(title = "Average Per-Student Athletic Spending Across Years (2018–2022)",
       x = "Year",
       y = "Mean Athletic Spending Per Student (USD)",
       fill = "Year") +
  theme_minimal() +
  scale_fill_brewer(palette = "Set3")

The plot shows the mean total athletic spending from 2018 to 2022, highlighting a decline in 2021, possibly due to disruptions like the COVID-19 pandemic. Spending increased again in 2022, surpassing previous years, indicating a recovery or increased investment in athletics.

Make a Means dataframe for means across 5 years of the variables

# Create a dataframe for calculating means for the specified columns
means_df <- norm_df %>%
  rowwise() %>%
  mutate(
    coach_comp_third = mean(c(coach_comp_third_2018, coach_comp_third_2019, coach_comp_third_2020, coach_comp_third_2021, coach_comp_third_2022), na.rm = TRUE),
    support_comp_third = mean(c(support_comp_third_2018, support_comp_third_2019, support_comp_third_2020, support_comp_third_2021, support_comp_third_2022), na.rm = TRUE),
    coach_comp = mean(c(coach_comp_2018, coach_comp_2019, coach_comp_2020, coach_comp_2021, coach_comp_2022), na.rm = TRUE),
    guarantees = mean(c(guarantees_2018, guarantees_2019, guarantees_2020, guarantees_2021, guarantees_2022), na.rm = TRUE),
    total_ops = mean(c(total_ops_2018, total_ops_2019, total_ops_2020, total_ops_2021, total_ops_2022), na.rm = TRUE),
    aid = mean(c(aid_2018, aid_2019, aid_2020, aid_2021, aid_2022), na.rm = TRUE),
    support_comp = mean(c(support_comp_2018, support_comp_2019, support_comp_2020, support_comp_2021, support_comp_2022), na.rm = TRUE),
    severance = mean(c(severance_2018, severance_2019, severance_2020, severance_2021, severance_2022), na.rm = TRUE),
    recruiting = mean(c(recruiting_2018, recruiting_2019, recruiting_2020, recruiting_2021, recruiting_2022), na.rm = TRUE),
    team_travel = mean(c(teamtravel_2018, teamtravel_2019, teamtravel_2020, teamtravel_2021, teamtravel_2022), na.rm = TRUE),
    equip = mean(c(equip_2018, equip_2019, equip_2020, equip_2021, equip_2022), na.rm = TRUE),
    game_expenses = mean(c(game_expenses_2018, game_expenses_2019, game_expenses_2020, game_expenses_2021, game_expenses_2022), na.rm = TRUE),
    marketing = mean(c(marketing_2018, marketing_2019, marketing_2020, marketing_2021, marketing_2022), na.rm = TRUE),
    camps = mean(c(camps_2018, camps_2019, camps_2020, camps_2021, camps_2022), na.rm = TRUE),
    spirits = mean(c(spirit_2018, spirit_2019, spirit_2020, spirit_2021, spirit_2022), na.rm = TRUE),
    facilities = mean(c(facilities_rentals_2018, facilities_rentals_2019, facilities_rentals_2020, facilities_rentals_2021, facilities_rentals_2022), na.rm = TRUE),
    overhead = mean(c(overhead_2018, overhead_2019, overhead_2020, overhead_2021, overhead_2022), na.rm = TRUE),
    indirect_institutional = mean(c(indirect_institutional_2018, indirect_institutional_2019, indirect_institutional_2020, indirect_institutional_2021, indirect_institutional_2022), na.rm = TRUE),
    medical = mean(c(medical_2018, medical_2019, medical_2020, medical_2021, medical_2022), na.rm = TRUE),
    membership = mean(c(memberships_2018, memberships_2019, memberships_2020, memberships_2021, memberships_2022), na.rm = TRUE),
    meals = mean(c(meals_2018, meals_2019, meals_2020, meals_2021, meals_2022), na.rm = TRUE),
    otherops = mean(c(otherops_2018, otherops_2019, otherops_2020, otherops_2021, otherops_2022), na.rm = TRUE),
    bowl_expenses = mean(c(bowl_expense_2018, bowl_expense_2019, bowl_expense_2020, bowl_expense_2021, bowl_expense_2022), na.rm = TRUE),
    prof_def = mean(c(prof_def_2018, prof_def_2019, prof_def_2020, prof_def_2021, prof_def_2022), na.rm = TRUE),
    bowl_coachcomp = mean(c(bowl_coachcomp_2018, bowl_coachcomp_2019, bowl_coachcomp_2020, bowl_coachcomp_2021, bowl_coachcomp_2022), na.rm = TRUE),
    total_expenditure = mean(c(total_expendature.x, total_expendature.y, total_expendature.x.x, total_expendature.y.y), na.rm = TRUE),
    discounts_to_tuition = mean(c(discounts_to_tuition.x, discounts_to_tuition.y, discounts_to_tuition.x.x, discounts_to_tuition.y.y), na.rm = TRUE),
    gifts = mean(c(gifts.x, gifts.y, gifts.x.x, gifts.y.y), na.rm = TRUE),
    research_expenditure = mean(c(reseach_expendatures.x, reseach_expendatures.y, reseach_expendatures.x.x, reseach_expendatures.y.y), na.rm = TRUE),
    graduation_rate = mean(c(graduation_rate.x, graduation_rate.y, graduation_rate.x.x, graduation_rate.y.y), na.rm = TRUE)
  ) %>%
  ungroup() %>%
  select(UNITID, school_name.x, coach_comp_third, support_comp_third, coach_comp, guarantees, total_ops, aid, 
         support_comp, severance, recruiting, team_travel, equip, game_expenses, marketing, camps, spirits, facilities,
         overhead, indirect_institutional, medical, membership, meals, otherops, bowl_expenses, prof_def, bowl_coachcomp,
         total_expenditure, discounts_to_tuition, gifts, research_expenditure, graduation_rate)
colnames(means_df)[colnames(means_df) == "school_name.x"] <- "school_name"
sample_n(means_df, 5)
UNITID school_name coach_comp_third support_comp_third coach_comp guarantees total_ops aid support_comp severance recruiting team_travel equip game_expenses marketing camps spirits facilities overhead indirect_institutional medical membership meals otherops bowl_expenses prof_def bowl_coachcomp total_expenditure discounts_to_tuition gifts research_expenditure graduation_rate
237525 Marshall University 0 0.00000 26.73354 3.51122 152.38260 34.40386 22.08134 0.80580 2.80142 15.34270 5.16592 4.77868 3.45652 0.45678 0.64628 4.52558 5.54316 4.69456 5.52618 1.73744 0.18344 6.65998 2.60452 -2.505900 0.72370 285.1008 186.8722 5.912425 78.97922 0.190750
240727 University of Wyoming 143840 0.10232 25.07428 2.84374 126.50644 20.49662 25.50908 0.12744 2.06770 8.46388 3.08884 3.22246 14.57676 0.00000 0.23294 0.93744 3.97162 1.48634 3.09566 1.46486 3.31734 4.35856 1.27412 6.851033 0.37874 256.3442 117.1760 84.771600 280.57642 0.125425
147703 Northern Illinois University 0 0.00000 14.10728 1.25626 72.19782 21.71176 8.94982 0.02150 0.76152 6.22568 2.59596 2.00182 1.78804 0.32068 0.16468 1.86960 2.05304 1.48158 1.52224 0.59768 0.47790 2.98002 1.15786 1.889800 0.15284 145.3225 174.9625 0.000000 66.29958 0.065225
214777 Pennsylvania State University-Main Campus 0 0.00000 282.41374 22.03748 1381.10088 190.33044 251.90858 4.59506 21.67246 65.59184 32.79056 78.15078 13.18416 0.00000 2.93644 147.07454 103.22010 0.00000 17.09872 0.84410 16.72766 107.18032 19.49210 71.272875 3.85174 2683.2919 NaN NaN NaN 0.174325
201885 University of Cincinnati-Main Campus 0 0.00000 61.74038 5.19000 325.88182 47.24372 38.85760 1.05278 3.76404 19.82102 3.08242 9.23016 15.37966 0.00000 1.09138 59.35060 6.50934 10.20676 2.79560 0.50214 3.52860 28.41996 6.39024 17.070175 1.72552 641.8050 497.5329 286.496475 956.01500 0.111725
# Select relevant columns for analysis
two_groups_df <- means_df %>%
  select(total_ops, discounts_to_tuition, gifts, graduation_rate, research_expenditure)

# Scale 'total_ops' into spending groups (Low, High)
two_groups_df <- two_groups_df %>%
  mutate(total_ops_group = cut(
    total_ops,
    breaks = quantile(total_ops, probs = seq(0, 1, 0.5), na.rm = TRUE),
    labels = c("Low", "High"),
    include.lowest = TRUE
  ))

two_groups_df <- two_groups_df %>% filter(!is.na(total_ops_group))


# Select relevant columns for analysis
three_groups_df <- means_df %>%
  select(total_ops, discounts_to_tuition, gifts, graduation_rate, research_expenditure)

# Scale 'total_ops' into spending groups (Low, Medium, High)
three_groups_df <- three_groups_df %>%
  mutate(total_ops_group = cut(
    total_ops,
    breaks = quantile(total_ops, probs = seq(0, 1, 0.33), na.rm = TRUE),
    labels = c("Low", "Medium",  "High"),
    include.lowest = TRUE
  ))

three_groups_df <- three_groups_df %>% filter(!is.na(total_ops_group))
dependent_vars <- c("discounts_to_tuition", "gifts", "graduation_rate", "research_expenditure")
independent_var <- "total_ops_group"

two_groups_subset <- two_groups_df[, c(dependent_vars, independent_var)]
two_groups_subset <- na.omit(two_groups_subset)

three_groups_subset <- three_groups_df[, c(dependent_vars, independent_var)]
three_groups_subset <- na.omit(three_groups_subset)
head(three_groups_df, 5)
total_ops discounts_to_tuition gifts graduation_rate research_expenditure total_ops_group
235.10310 518.9550 270.18552 0.391975 2108.97807 Medium
539.96740 822.0061 148.70020 0.129600 278.65440 High
791.66188 706.2327 257.76210 0.055350 1227.54595 High
85.87798 101.6097 42.18203 0.213800 99.05178 Low
231.02018 896.3296 160.60310 0.112125 763.93555 Medium
# Calculate group means
group_means <- three_groups_df %>%
  group_by(total_ops_group) %>%
  summarize(across(c(discounts_to_tuition, gifts, graduation_rate, research_expenditure), mean, na.rm = TRUE))
Warning: There was 1 warning in `summarize()`.
ℹ In argument: `across(...)`.
ℹ In group 1: `total_ops_group = Low`.
Caused by warning:
! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
Supply arguments directly to `.fns` through an anonymous function instead.

  # Previously
  across(a:b, mean, na.rm = TRUE)

  # Now
  across(a:b, \(x) mean(x, na.rm = TRUE))
# Barplot for means
ggplot(group_means, aes(x = total_ops_group, y = discounts_to_tuition, fill = total_ops_group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Mean Discounts to Tuition by Athletic Spending Group", x = "Spending Group", y = "Mean Discounts to Tuition Per Student (USD)") +
  theme_minimal()

ggplot(group_means, aes(x = total_ops_group, y = gifts, fill = total_ops_group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Mean Gifts by Athletic Spending Group", x = "Spending Group", y = "Mean Gifts Per Student (USD)") +
  theme_minimal()

ggplot(group_means, aes(x = total_ops_group, y = research_expenditure, fill = total_ops_group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Mean Research Expenditure by Athletic Spending Group", x = "Spending Group", y = "Mean Research Expenditure Per Student USD") +
  theme_minimal()

ggplot(group_means, aes(x = total_ops_group, y = graduation_rate, fill = total_ops_group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Mean Graduation Rate by Athletic Spending Group", x = "Spending Group", y = "Mean Graduation Rate") +
  theme_minimal()

# Select relevant columns for analysis
three_groups_df_1 <- means_df %>%
  select(coach_comp, discounts_to_tuition, gifts, graduation_rate, research_expenditure)

# Scale 'coach_comp' into spending groups (Low, Medium, High)
three_groups_df_1 <- three_groups_df_1 %>%
  mutate(coach_comp_group = cut(
    coach_comp,
    breaks = quantile(coach_comp, probs = seq(0, 1, 0.33), na.rm = TRUE),
    labels = c("Low", "Medium", "High"),
    include.lowest = TRUE
  ))

# Filter out rows with missing coach_comp_group
three_groups_df_1 <- three_groups_df_1 %>% filter(!is.na(coach_comp_group))
# Calculate group means
group_means_1 <- three_groups_df_1 %>%
  group_by(coach_comp_group) %>%
  summarize(across(c(discounts_to_tuition, gifts, graduation_rate, research_expenditure), mean, na.rm = TRUE))
# Barplot for means
ggplot(group_means_1, aes(x = coach_comp_group, y = discounts_to_tuition, fill = coach_comp_group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Mean Discounts to Tuition by Coach Compensation Group", x = "Coach Compensation Group", y = "Mean Discounts to Tuition Per Student (USD)") +
  theme_minimal()

# Barplot for means
ggplot(group_means_1, aes(x = coach_comp_group, y = gifts, fill = coach_comp_group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Mean Gifts by Coach Compensation Group", x = "Coach Compensation Group", y = "Mean Gifts Per Student (USD)") +
  theme_minimal()

# Barplot for means
ggplot(group_means_1, aes(x = coach_comp_group, y = research_expenditure, fill = coach_comp_group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Mean Research Expenditure by Coach Compensation Group", x = "Coach Compensation Group", y = "Mean Research Expenditure Per Student (USD)") +
  theme_minimal()

# Barplot for means
ggplot(group_means_1, aes(x = coach_comp_group, y = graduation_rate, fill = coach_comp_group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Mean Graduation Numbers by Coach Compensation Group", x = "Coach Compensation Group", y = "Mean Graduation Numbers") +
  theme_minimal()

# Select relevant columns for analysis
three_groups_df_2 <- means_df %>%
  select(support_comp, discounts_to_tuition, gifts, graduation_rate, research_expenditure)

# Scale 'support_comp' into spending groups (Low, Medium, High)
three_groups_df_2 <- three_groups_df_2 %>%
  mutate(support_comp_group = cut(
    support_comp,
    breaks = quantile(support_comp, probs = seq(0, 1, 0.33), na.rm = TRUE),
    labels = c("Low", "Medium", "High"),
    include.lowest = TRUE
  ))

# Filter out rows with missing coach_comp_group
three_groups_df_2 <- three_groups_df_2 %>% filter(!is.na(support_comp_group))
head(three_groups_df_2)
support_comp discounts_to_tuition gifts graduation_rate research_expenditure support_comp_group
37.36655 518.9550 270.18552 0.391975 2108.97807 Medium
96.57464 822.0061 148.70020 0.129600 278.65440 High
145.34364 706.2327 257.76210 0.055350 1227.54595 High
13.02626 101.6097 42.18203 0.213800 99.05178 Low
40.94726 896.3296 160.60310 0.112125 763.93555 Medium
72.48252 1051.1068 352.52637 0.048650 2216.03515 High
# Calculate group means
group_means_2 <- three_groups_df_2 %>%
  group_by(support_comp_group) %>%
  summarize(across(c(discounts_to_tuition, gifts, graduation_rate, research_expenditure), mean, na.rm = TRUE))
# Barplot for means
ggplot(group_means_2, aes(x = support_comp_group, y = discounts_to_tuition, fill = support_comp_group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Mean Discounts to Tuition by Support Compensation Group", x = "Support Compensation Group", y = "Mean Discounts to Tuition Per Student (USD)") +
  theme_minimal()

# Barplot for means
ggplot(group_means_2, aes(x = support_comp_group, y = gifts, fill = support_comp_group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Mean Gifts by Support Compensation Group", x = "Support Compensation Group", y = "Mean Gifts Per Student (USD)") +
  theme_minimal()

# Barplot for means
ggplot(group_means_2, aes(x = support_comp_group, y = research_expenditure, fill = support_comp_group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Mean Research Expenditure by Support Compensationn Group", x = "Support Compensation Group", y = "Mean Research Expenditure Per Student (USD)") +
  theme_minimal()

# Barplot for means
ggplot(group_means_2, aes(x = support_comp_group, y = graduation_rate, fill = support_comp_group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Mean Graduation Numbers by Support Compensation Group", x = "Support Compensation Group", y = "Mean Graduation Numbers") +
  theme_minimal()

# Compute correlations
correlation_matrix <- three_groups_df %>%
  select(discounts_to_tuition, gifts, graduation_rate, research_expenditure) %>%
  cor(use = "complete.obs")

# Heatmap
melted_corr <- melt(correlation_matrix)
ggplot(melted_corr, aes(Var1, Var2, fill = value)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, limit = c(-1, 1), space = "Lab") +
  theme_minimal() +
  labs(title = "Correlation Heatmap", x = "", y = "")

Variables with strong correlations (e.g., discounts_to_tuition and gifts) might be influencing each other and could provide insights when analyzed further. Weaker correlations (e.g., involving graduation_rate) suggest these variables may be independent or have limited direct relationships with others.

# Violin plot 
ggplot(three_groups_df, aes(x = total_ops_group, y = discounts_to_tuition, fill = total_ops_group)) +
  geom_violin(trim = FALSE) +
  geom_boxplot(width = 0.2, fill = "white") +
  labs(title = "Tuition Discounts by Athletic Spending Group", x = "Athletic Spending Group", y = "discounts to tuition") +
  theme_minimal()

ggplot(three_groups_df, aes(x = total_ops_group, y = gifts, fill = total_ops_group)) +
  geom_violin(trim = FALSE) +
  geom_boxplot(width = 0.2, fill = "white") +
  labs(title = "Gifts by Athletic Spending Group", x = "Athletic Spending Group", y = "gifts") +
  theme_minimal()

ggplot(three_groups_df, aes(x = total_ops_group, y = research_expenditure, fill = total_ops_group)) +
  geom_violin(trim = FALSE) +
  geom_boxplot(width = 0.2, fill = "white") +
  labs(title = "Research Expenditure by Athletic Spending Group", x = "Athletic Spending Group", y = "Research Expenditure") +
  theme_minimal()

ggplot(three_groups_df, aes(x = total_ops_group, y = graduation_rate, fill = total_ops_group)) +
  geom_violin(trim = FALSE) +
  geom_boxplot(width = 0.2, fill = "white") +
  labs(title = "Graduation Rate by Athletic Spending Group", x = "Athletic Spending Group", y = "Graduation Rate") +
  theme_minimal()

head(two_groups_df, 5)
total_ops discounts_to_tuition gifts graduation_rate research_expenditure total_ops_group
235.10310 518.9550 270.18552 0.391975 2108.97807 Low
539.96740 822.0061 148.70020 0.129600 278.65440 High
791.66188 706.2327 257.76210 0.055350 1227.54595 High
85.87798 101.6097 42.18203 0.213800 99.05178 Low
231.02018 896.3296 160.60310 0.112125 763.93555 Low
# Calculate group means
group_means <- two_groups_df %>%
  group_by(total_ops_group) %>%
  summarize(across(c(discounts_to_tuition, gifts, graduation_rate, research_expenditure), mean, na.rm = TRUE))

# Barplot for means: Discounts to Tuition
ggplot(group_means, aes(x = total_ops_group, y = discounts_to_tuition, fill = total_ops_group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Mean Discounts to Tuition by Athletic Spending Group (Two Groups)", 
       x = "Spending Group", 
       y = "Mean Discounts to Tuition") +
  theme_minimal()

# Barplot for means: Gifts
ggplot(group_means, aes(x = total_ops_group, y = gifts, fill = total_ops_group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Mean Gifts by Athletic Spending Group (Two Groups)", 
       x = "Spending Group", 
       y = "Mean Gifts") +
  theme_minimal()

# Barplot for means: Research Expenditure
ggplot(group_means, aes(x = total_ops_group, y = research_expenditure, fill = total_ops_group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Mean Research Expenditure by Athletic Spending Group (Two Groups)", 
       x = "Spending Group", 
       y = "Mean Research Expenditure") +
  theme_minimal()

# Barplot for means: Graduation Rate
ggplot(group_means, aes(x = total_ops_group, y = graduation_rate, fill = total_ops_group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Mean Graduation Rate by Athletic Spending Group (Two Groups)", 
       x = "Spending Group", 
       y = "Mean Graduation Rate") +
  theme_minimal()

Which Schools has the most spending? Which schools has the least athletic spending?

# Select top 20 schools by total expenditure
top_schools <- means_df %>%
  arrange(desc(total_ops)) %>%
  slice_head(n = 20)

# Set the order of `school_name` based on the ranking of total expenditure
top_schools <- top_schools %>%
  mutate(school_name = factor(school_name, levels = school_name[order(desc(total_ops))]))

# Ensure the school names are ranked by total_ops
top_schools <- top_schools %>%
  mutate(school_name.x = factor(school_name, levels = school_name[order(desc(total_ops))]))

# Barplot for top 20 schools expenditures (normalized per student)
ggplot(top_schools, aes(x = reorder(school_name, total_ops), y = total_ops)) +
  geom_bar(stat = "identity", fill = "steelblue", alpha = 0.8) +
  coord_flip() +  
  labs(title = "Sports Expenditure Top 20 schools",
       x = "School Name",
       y = "Sports Expenditure Per Student (USD)",
       caption = "Data: Means across 2018–2022 (Normalized Per Student)") +
  theme_minimal()

# Select bottom 20 schools by total expenditure
least_spendy_schools <- means_df %>%
  arrange(total_ops) %>%  # Arrange in ascending order
  slice_head(n = 20)  # Select the first 20 rows

# Ensure the school names are ranked by total expenditure (descending order)
least_spendy_schools <- least_spendy_schools %>%
  mutate(school_name.x = factor(school_name, levels = school_name[order(desc(total_ops))]))

# Barplot for 20 least spendy schools expenditures
ggplot(least_spendy_schools, aes(x = reorder(school_name.x, -total_ops), y = total_ops)) +
  geom_bar(stat = "identity", fill = "steelblue", alpha = 0.8) +
  coord_flip() +  # Flip the coordinates for a horizontal bar chart
  labs(title = "Sports Expenditure 20 Least Spendy Schools",
       x = "School Name",
       y = "Sports Expenditure Per Student (USD)",
       caption = "Data: Means across 2018–2022 (Normalized Per Student)") +
  theme_minimal()

ggplot(least_spendy_schools, aes(x = school_name, y = discounts_to_tuition)) +
  geom_bar(stat = "identity", fill = "steelblue", alpha = 0.8) +
  coord_flip() +  
  labs(
    title = "Tuition Discounts for Bottom 20 Schools",
    x = "School Name",
    y = "Tuition Discounts",
    caption = "Data: Means across 2018-2022"
  ) +
  theme_minimal()

ggplot(least_spendy_schools, aes(x = school_name, y = gifts)) +
  geom_bar(stat = "identity", fill = "orange", alpha = 0.8) +
  coord_flip() +
  labs(title = "Gifts for Bottom 20 Schools",
       x = "School Name",
       y = "Gifts",
       caption = "Data: Means across 2018-2022")

ggplot(least_spendy_schools, aes(x = school_name, y = research_expenditure)) +
  geom_bar(stat = "identity", fill = "darkgreen", alpha = 0.8) +
  coord_flip() +
  labs(title = "Research Expenditure for Bottom 20 Schools",
       x = "School Name",
       y = "Research Expenditure",
       caption = "Data: Means across 2018-2022")

ggplot(least_spendy_schools, aes(x = school_name, y = graduation_rate)) +
  geom_bar(stat = "identity", fill = "purple", alpha = 0.8) +
  coord_flip() +
  labs(title = "Graduation Rate for Bottom 20 Schools",
       x = "School Name",
       y = "Graduation Rate",
       caption = "Data: Means across 2018-2022")

ggplot(top_schools, aes(x = school_name, y = discounts_to_tuition)) +
  geom_bar(stat = "identity", fill = "steelblue", alpha = 0.8) +
  coord_flip() +  
  labs(
    title = "Tuition Discounts Top 20 Schools",
    x = "School Name",
    y = "Tuition Discounts Per Student (USD)",
    caption = "Data: Means across 2018-2022"
  ) +
  theme_minimal()
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_bar()`).

ggplot(top_schools, aes(x = school_name, y = gifts)) +
  geom_bar(stat = "identity", fill = "orange", alpha = 0.8) +
  coord_flip() +
  labs(title = "Gifts for Top 20 Schools",
       x = "School Name",
       y = "Gifts Per Student (USD)",
       caption = "Data: Means across 2018-2022")
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_bar()`).

ggplot(top_schools, aes(x = school_name, y = research_expenditure)) +
  geom_bar(stat = "identity", fill = "darkgreen", alpha = 0.8) +
  coord_flip() +
  labs(title = "Research Expenditure for Top 20 Schools",
       x = "School Name",
       y = "Research Expenditure Per Student (USD)",
       caption = "Data: Means across 2018-2022")
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_bar()`).

ggplot(top_schools, aes(x = school_name, y = graduation_rate)) +
  geom_bar(stat = "identity", fill = "purple", alpha = 0.8) +
  coord_flip() +
  labs(title = "Graduation Numbers for Top 20 Schools",
       x = "School Name",
       y = "Normalized Number of Graduates",
       caption = "Data: Means across 2018-2022")

# Select relevant numeric columns
correlation_data <- means_df %>%
  select(total_ops, gifts, research_expenditure, graduation_rate, discounts_to_tuition)

# Compute correlation matrix
correlation_matrix <- round(cor(correlation_data, use = "complete.obs"), 2)

# Convert to long format for heatmap
library(reshape2)
correlation_long <- melt(correlation_matrix)

# Heatmap with enhanced readability
ggplot(correlation_long, aes(x = Var1, y = Var2, fill = value)) +
  geom_tile(color = "white") +
  geom_text(aes(label = value), color = "black", size = 4) +  # Add correlation values
  scale_fill_gradient2(low = "red", high = "blue", mid = "white", midpoint = 0,
                       limits = c(-1, 1), name = "Correlation") +
  labs(title = "Correlation Between Spending and Benefits",
       x = "",
       y = "",
       fill = "Correlation") +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),  # Rotate x-axis labels
        axis.text.y = element_text(size = 10))

Correlation between each dependent variable & all indp. variables

# Independent Variables vs discounts_to_tuition
selected_data <- means_df %>%
  select(coach_comp_third, support_comp, coach_comp, guarantees, severance, recruiting, 
         team_travel, equip, game_expenses, marketing, camps, spirits, facilities, overhead, 
         indirect_institutional, medical, membership, meals, otherops, bowl_expenses, prof_def, 
         bowl_coachcomp, discounts_to_tuition)

# Compute correlation matrix
correlation_matrix <- cor(selected_data, use = "complete.obs")

# Subset correlations for discounts_to_tuition
correlation_subset <- correlation_matrix[1:22, "discounts_to_tuition", drop = FALSE]

# Convert to long format
correlation_long <- as.data.frame(correlation_subset) %>%
  rownames_to_column(var = "Independent_Variable") %>%
  rename(Correlation = discounts_to_tuition)

# Create heatmap
ggplot(correlation_long, aes(x = reorder(Independent_Variable, -Correlation), 
                              y = "discounts_to_tuition", fill = Correlation)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "red", high = "blue", mid = "white", midpoint = 0) +
  labs(title = "Correlation: Independent Variables vs Discounts to Tuition",
       x = "Independent Variables",
       y = "Discounts to Tuition (Dependent Variable)",
       fill = "Correlation") +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Independent Variables vs gifts
selected_data <- means_df %>%
  select(coach_comp_third, support_comp, coach_comp, guarantees, aid, severance, recruiting, 
         team_travel, equip, game_expenses, marketing, camps, spirits, facilities, overhead, 
         indirect_institutional, medical, membership, meals, otherops, bowl_expenses, prof_def, 
         bowl_coachcomp, gifts)

# Compute correlation matrix
correlation_matrix <- cor(selected_data, use = "complete.obs")

# Subset correlations for gifts
correlation_subset <- correlation_matrix[1:23, "gifts", drop = FALSE]

# Convert to long format
correlation_long <- as.data.frame(correlation_subset) %>%
  rownames_to_column(var = "Independent_Variable") %>%
  rename(Correlation = gifts)

# Create heatmap
ggplot(correlation_long, aes(x = reorder(Independent_Variable, -Correlation), 
                              y = "gifts", fill = Correlation)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "red", high = "blue", mid = "white", midpoint = 0) +
  labs(title = "Correlation: Independent Variables vs Gifts",
       x = "Independent Variables",
       y = "Gifts (Dependent Variable)",
       fill = "Correlation") +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Independent Variables vs research_expenditure
selected_data <- means_df %>%
  select(coach_comp_third, support_comp, coach_comp, guarantees, aid, severance, recruiting, 
         team_travel, equip, game_expenses, marketing, camps, spirits, facilities, overhead, 
         indirect_institutional, medical, membership, meals, otherops, bowl_expenses, prof_def, 
         bowl_coachcomp, research_expenditure)

# Compute correlation matrix
correlation_matrix <- cor(selected_data, use = "complete.obs")

# Subset correlations for research_expenditure
correlation_subset <- correlation_matrix[1:23, "research_expenditure", drop = FALSE]

# Convert to long format
correlation_long <- as.data.frame(correlation_subset) %>%
  rownames_to_column(var = "Independent_Variable") %>%
  rename(Correlation = research_expenditure)

# Create heatmap
ggplot(correlation_long, aes(x = reorder(Independent_Variable, -Correlation), 
                              y = "research_expenditure", fill = Correlation)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "red", high = "blue", mid = "white", midpoint = 0) +
  labs(title = "Correlation: Independent Variables vs Research Expenditure",
       x = "Independent Variables",
       y = "Research Expenditure (Dependent Variable)",
       fill = "Correlation") +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Independent Variables vs graduation_rate
selected_data <- means_df %>%
  select(coach_comp_third, support_comp, coach_comp, guarantees, aid, severance, recruiting, 
         team_travel, equip, game_expenses, marketing, camps, spirits, facilities, overhead, 
         indirect_institutional, medical, membership, meals, otherops, bowl_expenses, prof_def, 
         bowl_coachcomp, graduation_rate)

# Compute correlation matrix
correlation_matrix <- cor(selected_data, use = "complete.obs")

# Subset correlations for graduation_rate
correlation_subset <- correlation_matrix[1:23, "graduation_rate", drop = FALSE]

# Convert to long format
correlation_long <- as.data.frame(correlation_subset) %>%
  rownames_to_column(var = "Independent_Variable") %>%
  rename(Correlation = graduation_rate)

# Create heatmap
ggplot(correlation_long, aes(x = reorder(Independent_Variable, -Correlation), 
                              y = "graduation_rate", fill = Correlation)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "red", high = "blue", mid = "white", midpoint = 0) +
  labs(title = "Correlation: Independent Variables vs Graduation Rate",
       x = "Independent Variables",
       y = "Graduation Rate (Dependent Variable)",
       fill = "Correlation") +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Which specific indp. variable correlate to benefits?

# Predicting discounts_to_tuition
m_discounts_to_tuition <- lm(discounts_to_tuition ~ team_travel + support_comp + coach_comp + recruiting + equip + meals + facilities + overhead + guarantees, data = means_df)
summary(m_discounts_to_tuition)

Call:
lm(formula = discounts_to_tuition ~ team_travel + support_comp + 
    coach_comp + recruiting + equip + meals + facilities + overhead + 
    guarantees, data = means_df)

Residuals:
    Min      1Q  Median      3Q     Max 
-875.10 -146.16  -18.03  131.93 1092.32 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)   
(Intercept)   -12.697     59.372  -0.214  0.83112   
team_travel    24.300      7.742   3.139  0.00227 **
support_comp    6.099      2.549   2.392  0.01873 * 
coach_comp     -2.917      3.270  -0.892  0.37462   
recruiting      6.153     25.867   0.238  0.81250   
equip           5.299      7.911   0.670  0.50462   
meals          -6.337      9.884  -0.641  0.52297   
facilities      3.065      2.101   1.459  0.14787   
overhead       -2.398      2.082  -1.152  0.25233   
guarantees    -31.929     11.059  -2.887  0.00482 **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 295.9 on 94 degrees of freedom
  (1 observation deleted due to missingness)
Multiple R-squared:  0.6813,    Adjusted R-squared:  0.6508 
F-statistic: 22.33 on 9 and 94 DF,  p-value: < 2.2e-16
# Predicting gifts
model_gifts <- lm(gifts ~ support_comp + meals + coach_comp + team_travel + overhead + equip + medical + aid + recruiting, data = means_df)
summary(model_gifts)

Call:
lm(formula = gifts ~ support_comp + meals + coach_comp + team_travel + 
    overhead + equip + medical + aid + recruiting, data = means_df)

Residuals:
    Min      1Q  Median      3Q     Max 
-479.34  -75.85  -18.33   43.21  825.39 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  -27.68643   46.26097  -0.598 0.550957    
support_comp   7.49274    1.91426   3.914 0.000172 ***
meals         16.78122    7.17172   2.340 0.021404 *  
coach_comp     0.43783    2.32151   0.189 0.850814    
team_travel    1.49863    6.20167   0.242 0.809578    
overhead      -1.00422    1.51638  -0.662 0.509431    
equip         -1.81596    5.79703  -0.313 0.754779    
medical        6.98794   10.13380   0.690 0.492165    
aid           -0.07472    1.38160  -0.054 0.956982    
recruiting   -61.30822   18.54020  -3.307 0.001338 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 216.1 on 94 degrees of freedom
  (1 observation deleted due to missingness)
Multiple R-squared:  0.6994,    Adjusted R-squared:  0.6706 
F-statistic:  24.3 on 9 and 94 DF,  p-value: < 2.2e-16
summary(means_df$gifts, na.rm = TRUE)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
   0.00   46.59  124.84  264.98  286.23 1918.27       1 
# Predicting research_expenditure
model_research_expenditure <- lm(research_expenditure ~ support_comp + coach_comp + team_travel + overhead + aid + meals, data = means_df)
summary(model_research_expenditure)

Call:
lm(formula = research_expenditure ~ support_comp + coach_comp + 
    team_travel + overhead + aid + meals, data = means_df)

Residuals:
    Min      1Q  Median      3Q     Max 
-2900.7  -533.2   -40.9   225.7  4007.1 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)  
(Intercept)  -305.904    192.880  -1.586   0.1160  
support_comp   12.608      8.549   1.475   0.1435  
coach_comp     -4.037      8.958  -0.451   0.6532  
team_travel    -1.568     26.201  -0.060   0.9524  
overhead       12.287      6.828   1.799   0.0751 .
aid            10.656      6.173   1.726   0.0875 .
meals          12.976     32.437   0.400   0.6900  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 982.8 on 97 degrees of freedom
  (1 observation deleted due to missingness)
Multiple R-squared:  0.6512,    Adjusted R-squared:  0.6296 
F-statistic: 30.18 on 6 and 97 DF,  p-value: < 2.2e-16
# Predicting graduation_rate
model_graduation_rate <- lm(graduation_rate ~ spirits + aid + medical + team_travel + recruiting, data = means_df)
summary(model_graduation_rate)

Call:
lm(formula = graduation_rate ~ spirits + aid + medical + team_travel + 
    recruiting, data = means_df)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.15519 -0.03960 -0.01249  0.03184  0.17955 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  7.484e-02  1.146e-02   6.528 2.87e-09 ***
spirits      1.905e-02  5.829e-03   3.268  0.00149 ** 
aid          1.169e-03  3.876e-04   3.016  0.00325 ** 
medical      4.356e-03  2.882e-03   1.512  0.13383    
team_travel -6.448e-05  1.545e-03  -0.042  0.96679    
recruiting  -1.092e-02  4.422e-03  -2.470  0.01522 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.06423 on 99 degrees of freedom
Multiple R-squared:  0.308, Adjusted R-squared:  0.273 
F-statistic: 8.811 on 5 and 99 DF,  p-value: 6.062e-07